Survey Completion Rates by District
##PAge 1
library(sf)
library(tidyverse)
##Import data
## Write table by district
table_by_district_gp<-xlsx::read.xlsx("C:/Users/ASUS/Dropbox/Karnataka Research/Analysis/2_Intermediate/Do/PDO Survey/PDO Survey Deployment/Output/district_targets.xlsx",1)
table_by_district_gp<-table_by_district_gp %>%
filter(!is.na(DistrictCode)) %>%
mutate(Percentage.GPs.Covered=round(Percentage.GPs.Covered))
## Write table by block
table_by_block_gp<-xlsx::read.xlsx("C:/Users/ASUS/Dropbox/Karnataka Research/Analysis/2_Intermediate/Do/PDO Survey/PDO Survey Deployment/Output/block_targets.xlsx",1)
table_by_block_gp<-table_by_block_gp %>%
filter(!is.na(DistrictCode)) %>%
mutate(Percentage.GPs.Covered=round(Percentage.GPs.Covered))
#Import division data
division_labels<-xlsx::read.xlsx("C:/Users/ASUS/Dropbox/Karnataka Research/Analysis/2_Intermediate/Do/PDO Survey/PDO Survey Deployment/Output/District_division_codes.xls",1)
division_labels<-division_labels%>%
rename(DistrictCode=District.Code)%>%
mutate(DistrictCode=as.character(DistrictCode))%>%
select(DistrictCode,Division)
##Add admin division
table_by_block_gp<-right_join(division_labels,table_by_block_gp)
## Joining, by = "DistrictCode"
table_by_district_gp<-right_join(division_labels,table_by_district_gp)
## Joining, by = "DistrictCode"
##Import karnataka shape file
karnataka_shp_files<- read_sf(dsn = "C:/Users/ASUS/Dropbox/Karnataka Research/Analysis/2_Intermediate/Do/PDO Survey/PDO Survey Deployment/Data/Shp Files Dist/gadm36_IND_2.shp",layer="gadm36_IND_2")
karnataka_shp_files <- karnataka_shp_files %>%
filter(NAME_1=="Karnataka")
##Fix district names
karnataka_shp_files <- karnataka_shp_files %>%
mutate(NAME_2=toupper(NAME_2))
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="BAGALKOT"]="BAGALKOTE"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="BANGALORE RURAL"]="BENGALURU RURAL"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="BANGALORE"]="BENGALURU"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="DHARWAD"] = "DHARWAR"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="KALBURGI"] = "KALABURAGI"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="KOLARA"] = "KOLAR"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="BELGAUM"] = "BELAGAVI"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="BELLARY"] = "BALLARI"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="BIJAPUR"] = "VIJAYAPURA"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="CHAMRAJNAGAR"] = "CHAMARAJANAGARA"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="CHIKBALLAPURA"] = "CHIKKABALLAPURA"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="GULBARGA"] = "KALABURAGI"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="MYSORE"] = "MYSURU"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="SHIMOGA"] = "SHIVAMOGGA"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="CHIKMAGALUR"] = "CHIKKAMAGALURU"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="TUMKUR"] = "TUMAKURU"
karnataka_shp_files$DistrictName=karnataka_shp_files$NAME_2
karnataka_shp_files <- st_transform(karnataka_shp_files,4326)
karnataka_shp_files_district_response<-left_join(karnataka_shp_files,table_by_district_gp)
## Joining, by = "DistrictName"
##Division shp
karnataka_shp_files_district_response_div<-karnataka_shp_files_district_response %>%
group_by(Division) %>%
summarize(geometry=st_union(geometry))
dat_block <-table_by_block_gp
dat_block <- dat_block %>%
select(DistrictName, BlockName, Total.GPs.in.the.Block, Total.GPs.covered.in.the.Block,Total.surveys.completed.in.the.Block,Percentage.GPs.Covered,Division)
###
dat_dist <-table_by_district_gp
dat_dist <- dat_dist %>%
select(DistrictName, Total.GPs.in.the.District, Total.GPs.covered.in.the.District,Total.surveys.completed.in.the.District,Percentage.GPs.Covered,Division)
###
shared_dat_block <- SharedData$new(data=dat_block ,group="abSelector" )
# shared_dat_dist <- SharedData$new(data=dat_dist,~DistrictName, group="abSelector")
p<-ggplot() +
# geom_sf(data=karnataka_shp_files_district_response_div,aes(color=Division),alpha=1,stroke=2,lwd=2)+
# scale_color_manual(values=c("darkorange","yellow","lightblue","darkgreen"))+
geom_sf_interactive(data = karnataka_shp_files_district_response,
aes(fill=Percentage.GPs.Covered , data_id = DistrictName ,
onclick = paste0("filterOn(\"",DistrictName,"\")"),tooltip=DistrictName),
alpha = .6) + theme_void()+
scale_fill_gradient(name="",low="white",high="darkorange", guide = guide_colorbar(reverse = FALSE,title="Percentage of GPs Covered"))+
scale_y_continuous(label = abs) +
scale_x_continuous(label = abs) + theme(panel.border = element_rect(colour = "grey", fill=NA, size=0.1),rect = element_blank(),panel.grid.major = element_line(color = "white"),axis.ticks = element_line())+
geom_sf_text_interactive(data = karnataka_shp_files_district_response,aes(label=Percentage.GPs.Covered,tooltip=DistrictName),size=3)
g <- girafe(ggobj = p)
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
rt_block<-datatable(shared_dat_block,colnames = c("District Name","Block Name","Total GPs in the Block","Total GPs covered in the Block","Total GPs covered in the Block","Total surveys completed in the Block","Percentage GPs Covered","Division") )
# rt_block <- reactable(
# shared_dat_block,
# elementId = "ABtable",
# columns = list(
# DistrictName = colDef(name = "District Name"),
# BlockName = colDef(name = "Block Name"),
# Total.GPs.in.the.Block = colDef(name = "Total GPs in the Block"),
# Total.GPs.covered.in.the.Block=colDef(name="Total GPs covered in the Block"),
# Total.surveys.completed.in.the.Block = colDef(name = "Total surveys completed in the Block"),
#
# Percentage.GPs.Covered=colDef(name = "Percentage GPs Covered",format=colFormat(percent = FALSE,digits = 1),style = function(value) {
# if (value < 20) {
# color <- "#white"
# } else if (value >80) {
# color <- "darkorange"
# } else {
# color <- "#ffead2"
# }
# list(color = color, fontWeight = "bold")
# })
# )
# ,showPageSizeOptions=TRUE)
rt_block<-datatable(shared_dat_block,colnames = c("District Name","Block Name","Total GPs in the Block","Total GPs covered in the Block","Total surveys completed in the Block","Percentage GPs Covered","Division") ,rownames = FALSE)
###
# rt_dist <- reactable(
# shared_dat_dist,
# elementId = "ABtableDist",
# columns = list(
# DistrictName = colDef(name = "District Name"),
# Total.GPs.in.the.District = colDef(name = "Total GPs in the District"),
# Total.GPs.covered.in.the.District=colDef(name="Total GPs covered in the District"),
# Total.surveys.completed.in.the.District = colDef(name = "Total surveys completed in the District"),
#
# Percentage.GPs.Covered=colDef(name = "Percentage GPs Covered",format=colFormat(percent = FALSE,digits = 1),style = function(value) {
# if (value < 20) {
# color <- "#white"
# } else if (value >80) {
# color <- "#8b0101"
# } else {
# color <- "#ffead2"
# }
# list(color = color, fontWeight = "bold")
# })
# )
# )
##Define filter
fs_div<-filter_select("DivFilter", "Select Division", shared_dat_block, group=~Division, multiple=TRUE)
fs_dist <- filter_select("letterFilter", "Select District", shared_dat_block, group=~DistrictName, multiple=TRUE)
##Layout
# bscols(
# list(fs_div,fs_dist, g),
# list(rt_block)
# )
### Row
rt_block
##Import pdo survey data
library(haven)
library(stringr)
pdo_survey_data<-read_dta(file="C:/Users/ASUS/Dropbox/Karnataka Research/Analysis/2_Intermediate/Do/PDO Survey/PDO Survey Deployment/Output/PDO_survey_cleaned.dta")
district_codes<-xlsx::read.xlsx("../Output/district_codes.xlsx",1)
## Districts into division
## source : Wikipedia
bangalore_division<-"BENGALURU, BENGALURU Rural, CHIKKABALLAPURA, Chitradurga, Davanagere, Kolar, Ramanagara, SHIVAMOGGA, Tumakuru"
bangalore_division <- bangalore_division %>% toupper() %>% str_split(.,", ")
belagavi_divison<-c("BAGALKOTE District, Belagavi District, VIJAYAPURA District, DHARWAR District, Gadag District, Haveri District, Uttara Kannada")
belagavi_divison<- belagavi_divison %>% toupper() %>% str_split(.," DISTRICT, ")
Kalaburagi_division<-c("BALLARI District, Bidar District, Kalaburagi District, Koppal District, Raichur District, Yadgir District, Vijayanagar")
Kalaburagi_division<- Kalaburagi_division %>% toupper() %>% str_split(.," DISTRICT, ")
Mysore_division<- c("CHAMARAJANAGARA, CHIKKAMAGALURU, Dakshina Kannada, Hassan, Kodagu, Mandya, MYSURU, Udupi")
Mysore_division<- Mysore_division %>% toupper() %>% str_split(.,", ")
district_codes <- district_codes %>%
mutate(DivisionName = case_when(
district_codes$DistrictName %in% unlist(bangalore_division) ~ "Bangalore",
DistrictName %in% unlist(belagavi_divison) ~ "Belagavi",
DistrictName %in% unlist(Kalaburagi_division) ~ "Kalaburagi",
DistrictName %in% unlist(Mysore_division) ~ "Mysore"
))
pdo_survey_data<-pdo_survey_data %>%
rename(DistrictCode=District)%>%
mutate(DistrictCode=as.integer(DistrictCode))
pdo_survey_data<-inner_join(pdo_survey_data,district_codes)
## Joining, by = "DistrictCode"
###Remove duplicates
##MAke a variable with panchayats - if multiple and single
pdo_survey_data$Panchayat_all_multiple=str_c(pdo_survey_data$Panchayat,pdo_survey_data$Panchayat_multiple,sep="")
##Seperate, sort , then concat
sorted_multiple_panchayats=pdo_survey_data$Panchayat_all_multiple %>% str_split(.," ",simplify = F) %>% sapply(.,function(x) (sort(x[x!=""]))) %>% sapply(.,function(x)paste(x,collapse = " "))
pdo_survey_data$Panchayat_all_multiple=sorted_multiple_panchayats
##Now clean the end date to find the last survey for each
library(lubridate)
pdo_survey_data<-pdo_survey_data %>%
mutate(endtime_cleaned=ymd_hms(endtime))
###Go at the PDO level for the section questions
###Drop duplicates by Panchayats by taking the last response by endtime
pdo_survey_data_pdo_level<-pdo_survey_data %>%
group_by(Panchayat_all_multiple) %>%
arrange(desc(endtime_cleaned)) %>%
slice(1)
##Get value labels
pdo_survey_data_pdo_level<-pdo_survey_data_pdo_level %>%
mutate(gender=haven::as_factor(gender)) %>%
mutate(education=haven::as_factor(education)) %>%
mutate(rural=haven::as_factor(rural))
##Fix gender levels
levels(pdo_survey_data_pdo_level$gender) = c("Female","Male","Other","Prefer Not to Say")
Who the PDOs are
##shared data for this section
# karnataka_shp_files_district_response_div <-karnataka_shp_files_district_response_div%>%
# rename(DivisionName=Division)
##Division Plot
# division_shp<-ggplot() +
# geom_sf_interactive(data = karnataka_shp_files_district_response_div,
# aes(fill=DivisionName , data_id = DivisionName ,
# onclick = paste0("filterOn(\"",DivisionName,"\")"),tooltip=DivisionName),
# alpha = .6) + theme_void()+
# scale_color_manual(values=c("darkorange","yellow","lightblue","darkgreen")) +
# scale_y_continuous(label = abs) +
# scale_x_continuous(label = abs) + theme(panel.border = element_rect(colour = "grey", fill=NA, size=0.1),rect = element_blank(),panel.grid.major = element_line(color = "white"),axis.ticks = element_line())
#
# division_shp <- girafe(ggobj =division_shp)
##Percent_rural
rural_percent<-pdo_survey_data_pdo_level %>% ungroup() %>%
add_count(rural,DivisionName) %>%
add_count(DivisionName) %>%
mutate(rural_perc=n/nn) %>%
select(DivisionName,rural_perc,rural) %>%
unique
## Storing counts in `nn`, as `n` already present in input
## i Use `name = "new_name"` to pick a new name.
##Percent_gender
gender_percent<-pdo_survey_data_pdo_level %>% ungroup() %>%
filter(gender %in% c("Female","Male")) %>%
add_count(gender,DivisionName) %>%
add_count(DivisionName) %>%
mutate(gender_perc=n/nn) %>%
select(DivisionName,gender_perc,gender) %>%
unique
## Storing counts in `nn`, as `n` already present in input
## i Use `name = "new_name"` to pick a new name.
#Percent edu
edu_percent<-pdo_survey_data_pdo_level %>% ungroup() %>%
filter(education %in% c("10th pass","PUC","Diploma","Bachelor’s Degree","Master’s Degree","PhD")) %>%
add_count(education,DivisionName) %>%
add_count(DivisionName) %>%
mutate(edu_perc=n/nn) %>%
select(DivisionName,edu_perc,education) %>%
unique
## Storing counts in `nn`, as `n` already present in input
## i Use `name = "new_name"` to pick a new name.
##combine percent data
percent_pdo_data<-inner_join(edu_percent,gender_percent)
## Joining, by = "DivisionName"
percent_pdo_data<-inner_join(rural_percent,percent_pdo_data)
## Joining, by = "DivisionName"
# ##Division Filter
# ##shared data full survey
# shared_pd_data<- SharedData$new(pdo_survey_data_pdo_level,group = "sec1div")
# shared_percent_data<- SharedData$new(percent_pdo_data,group = "sec1div")
#
# fs_div1<-filter_select("DivFilter2", "Select Division", shared_percent_data, group=~DivisionName, multiple=TRUE)
#Gender plot
gender_plot<- ggplot() +
geom_col(data=percent_pdo_data,aes(x=gender,y = gender_perc,fill=DivisionName),
position = "dodge")+
scale_x_discrete(limits=c("Female","Male")) +
scale_fill_manual(values = c("darkorange","yellow","lightblue","darkgreen"),name="Administrative Division")+
labs(y = "Percent", fill="Division",title="Gender Distribution") +
scale_y_continuous(labels = scales::percent) +
theme_bw() +
theme(axis.title.x = element_blank())
gender_plot<-ggplotly(gender_plot,tooltip = c("gender_perc"))
#Rural Plot
rural_plot<- ggplot() +
geom_col(data=percent_pdo_data,aes(x=rural,y = rural_perc,fill=DivisionName),
position = "dodge") +
scale_fill_manual(values = c("darkorange","yellow","lightblue","darkgreen"),name="Administrative Division")+
labs(y = "Percent", fill="Division",title = "Whether a PDO hails from a rural area") +
scale_y_continuous(labels = scales::percent) +
theme_bw() +
theme(axis.title.x = element_blank())
rural_plot<-ggplotly(rural_plot,tooltip = c("rural_perc"))
### Age Distribution
age_plot<- ggplot(pdo_survey_data_pdo_level, aes(age)) +
geom_density(adjust=5,aes(fill=DivisionName),alpha=0.3,position="identity") +
scale_x_continuous(breaks = seq(18,50,2)
,limits=c(18,50)) + xlab("Age") +
scale_y_continuous(labels = scales::percent,name="Percentage")+
labs(title = "Age distribution of the PDOs by Division") +
scale_fill_manual(values = c("darkorange","yellow","lightblue","darkgreen"),name="Administrative Division")+
theme_bw()
age_plot<-ggplotly(age_plot,tooltip = c("age"))
## Warning: Removed 903 rows containing non-finite values (stat_density).
## Edu level
edu_plot<-ggplot(data=percent_pdo_data) +
geom_col(aes(x=education,y = edu_perc,fill=DivisionName),position = "dodge") +
scale_y_continuous(labels = scales::percent,name="Percentage")+
labs(title = "Education distribution of the PDOs by Division") +
scale_fill_manual(values = c("darkorange","yellow","lightblue","darkgreen"),name="Administrative Division")+
scale_x_discrete(name="Highest Education Level")+
theme_bw() +
theme(axis.text.x = element_text(angle=320,vjust = 0.5,hjust=0.01))
edu_plot<-ggplotly(edu_plot,tooltip = c("edu_perc"))
Row
Men make up 75% of all PDOs
gender_plot
The density is maximum around 35-40%
age_plot
Row
Most PDOs have a college degree
edu_plot
Most PDOs hail from villages
rural_plot